
global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/refined_placebos.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

* v2
* --------------------------------- *
* --------------------------------- *
* Build refined placebos
* --------------------------------- *
* --------------------------------- *

/*
This-dofile builds "refined" placebos.

Namely, pauto90 / pauto95 patents
that have at least 1 [machinery] IPC4 code on it that
i) is an auto95 code when combined with other (ipc4_pair)
ii) auto95 code when combined with G05/G06
iii) has a 6-digit automation code
or i-iii) combined 
*/



cap program drop classify_machinery
program classify_machinery
    gen machinery = techn_sector == "Mechanical engineering" & (techn_field == "Handling" ///
    | techn_field == "Machine tools" | techn_field == "Other special machines" ///
    | techn_field == "Textile and paper machines")
    gen ipc3 = substr(cipc6, 1, 3)
    gen ipc4 = substr(cipc6, 1, 4)
    replace machinery = 0 if ipc3 == "F41" | ipc3 == "F42"
    replace machinery = 1 if ipc4 == "B42C"
    replace machinery = 1 if ipc4 == "B07C"
    replace machinery = 1 if cipc6 == "G05B19"
    replace machinery = 1 if cipc6 == "G05B2219"
    replace machinery = 1 if cipc6 == "B62D65"
    gen ipc1 = substr(cipc6, 1, 1)
    assert machinery == 0 if ipc1 == "Y"
    tab techn_field if machinery == 1
    replace techn_field = "non-machinery" if machinery == 0
    replace techn_field = "non-classified" if ipc1 == "Y"
    drop ipc1 ipc3 ipc4
end


* ------------------------------------------ *
* Find 4-digit codes of classified auto95
* ------------------------------------------ *

* 4 digit's of auto95 ipc6XX's
import delim ${classification_dir}/V6/classified_auto95_ipc6XX_n.csv, varnames(1) clear
gen ipc4 = substr(ipc6, 1, 4)
gen ipc4_auto95_ipc6XX = 1
keep ipc4 ipc4_auto95_ipc6XX
duplicates drop
tempfile ipc4_auto95_ipc6XX
save `ipc4_auto95_ipc6XX', replace


* ipc4 that are part of auto95 ipc4_pairs
import delim ${classification_dir}/V6/classified_auto95_ipc4_pairs_n.csv, varnames(1) clear
keep ipc1 
ren ipc1 ipc4
tempfile ipc4_pairs_1 
save `ipc4_pairs_1', replace

import delim ${classification_dir}/V6/classified_auto95_ipc4_pairs_n.csv, varnames(1) clear
ren ipc2 ipc4 
append using `ipc4_pairs_1'
keep ipc4
gen ipc4_auto95_ipc4_pairs = 1
duplicates drop
tempfile ipc4_auto95_ipc4_pairs
save `ipc4_auto95_ipc4_pairs', replace


* ipc4 that is auto95 when combined with G05/G06
import delim ${classification_dir}/V6/classified_auto95_ipc4_n.csv, varnames(1) clear
gen ipc4_auto95_ipc6XX = 1
keep ipc4 ipc4_auto95_ipc6XX
duplicates drop
tempfile ipc4_auto95_ipc4
save `ipc4_auto95_ipc4', replace


* ------------------------------------------ *
* Define the refined placebo vars
* ------------------------------------------ *

* prepare pauto90/95 - CIPC fam-level dataset
use ${dataset_dir}/patent_list/pauto95_patents.dta, clear
gen appln_pauto95 = 1

*pauto90 is a smaller, subset of pauto95, so assert works here every time
mmerge appln_id using ${dataset_dir}/patent_list/pauto90_patents.dta
gen appln_pauto90 = (_m == 3)
assert _m != 2
drop _m

* go to family level
mmerge appln_id using ${commondata_dir}/patstat_2018b/family_info.dta, unmatched(master) ukeep(docdb_family_id)
mmerge docdb_family_id using ${dataset_dir}/patstat_orbis/docdb_families2.dta, unmatched(master) ukeep(biadic_D)
ren biadic_D bia
drop _m
bys docdb_family_id: egen pauto90 = max(appln_pauto90)
bys docdb_family_id: egen pauto95 = max(appln_pauto95)
drop appln_id appln_pauto90 appln_pauto95
duplicates drop

* go to family-tech level
mmerge docdb_family_id using ${dataset_dir}/patstat_orbis/docdb_family_id_cipc_codes.dta, unmatched(master)
gen cipc4 = substr(cipc6,1,4)

*merge in the technical field description via the cipc codes
ren cipc4 ipc_maingroup_symbol
mmerge ipc using ${commondata_dir}/patstat_2018b/ipc_techn_field.dta, unmatched(master)
classify_machinery
gen ipc4=substr(cipc6, 1, 4)
sort docdb cipc6

* merge tech level with 4-digit auto95 codes, 
* tag the docbd with a ipc4 auto95 code
foreach class in ipc4 ipc6XX ipc4_pairs {
    mmerge ipc4 using "`ipc4_auto95_`class''", unmatched(master)
    gen is_iauto95`class' = (_m == 3)
    gen is_imauto95`class' = (_m == 3 & machinery == 1)
    drop _m
    bys docdb: egen tot_is_iauto95`class' = sum(is_iauto95`class')
    bys docdb: egen tot_is_imauto95`class' = sum(is_imauto95`class')
}
egen rowtot_is_iauto95 = rowtotal(is_iauto95*)
egen rowtot_is_imauto95 = rowtotal(is_imauto95*)
bys docdb: egen tot_is_iauto95 = sum(rowtot_is_iauto95)
bys docdb: egen tot_is_imauto95 = sum(rowtot_is_imauto95)

* reduce to family level
drop ipc4 cipc6 ipc_maingroup_symbol
duplicates drop

* define refined placebos for pauto90/95
foreach q in 90 95 {
    foreach class in ipc4 ipc6XX ipc4_pairs {
        gen pauto`q'_r`class' = (tot_is_iauto95`class' > 0 & pauto`q' == 1)
        gen pauto`q'_rm`class' = (tot_is_imauto95`class' > 0 & pauto`q' == 1)
    }
    gen pauto`q'_r = (tot_is_iauto95 > 0 & pauto`q' == 1)
    gen pauto`q'_rm = (tot_is_imauto95 > 0 & pauto`q' == 1)
}

* go on appln_id level and output the lists to fit the dep_var 
* & spill data construction pipeline
local pvars_all pauto90_r pauto90_rm pauto90_ripc4 pauto90_rmipc4 ///
pauto90_ripc4_pairs pauto90_rmipc4_pairs pauto90_ripc6XX pauto90_rmipc6XX ///
pauto95_r pauto95_rm pauto95_ripc4 pauto95_rmipc4 pauto95_ripc4_pairs ///
pauto95_rmipc4_pairs pauto95_ripc6XX pauto95_rmipc6XX

* spill_pauto90_rmipc6XX_bia_WD_1995 that would be part of the pipeline later 
* breaks the infamous 32 character limit (#loveStata), so doing that

local pvers_all r ripc4 ripc4_pairs ripc6XX rm rmipc4 rmipc4_pairs rmipc6XX

ren pauto90_rmipc6XX pauto90_rm6
ren pauto95_rmipc6XX pauto95_rm6
local pvers_main rm rm6

mmerge docdb_family_id using ${commondata_dir}/patstat_2018b/family_info.dta, unmatched(master) ukeep(appln_id)
foreach q in 90 95 { 
    foreach pvar of local pvers_main {
        preserve
        keep if pauto`q'_`pvar' == 1
        keep appln_id
        duplicates drop
        save ${dataset_dir}/patent_list/pauto`q'_`pvar'_patents.dta, replace
        restore
    }
}

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat